library(tidyverse)
library(lubridate)
dt_issues = read.csv("data/issues_data.csv", header=TRUE)
dt_star = read.csv("data/stargazers.csv", header=TRUE)
dt_user = read.csv("data/users_data.csv", header=TRUE)
# Inspect the dataset by taking the first 10 rows of each dataset.
dt_issues %>% head(10)
dt_star %>% head(10)
dt_user %>% head(10)
# User dataset cleaning
# Cleaning
dt_user_cld <- dt_user %>%
# Join the issues dataset.
left_join(dt_issues, by="X_id") %>%
select(bio, blog, company, created_at.x, followers, following, hireable, location, login, name, public_gists,
type, closed_at, updated_at.x, email, organizations_url, public_repos) %>%
# Drop unused features.
# select(-X_id, -avatar_url, -events_url, -followers_url, -following_url,
# -gists_url, -gravatar_id, -html_url, -node_id, -public_gists, -received_events_url,
# -repos_url, -site_admin, -starred_url, -subscriptions_url, -type) %>%
# Convert the time/date features to relative format.
mutate(created_at = lubridate::ymd_hms(created_at.x),
updated_at = lubridate::ymd_hms(updated_at.x)) %>%
# Convert various factor type features to string type.
mutate(bio = as.character(bio),
blog = as.character(blog),
company = as.character(company),
email = as.character(email),
location = as.character(location),
login = as.character(login),
name = as.character(name),
organizations_url = as.character(organizations_url))
Column `X_id` joining factors with different levels, coercing to character vector
str(dt_user_cld)
'data.frame': 39987 obs. of 19 variables:
$ bio : chr "" "" "" "" ...
$ blog : chr "" "" "" "" ...
$ company : chr "" "" "" "" ...
$ created_at.x : Factor w/ 39971 levels "2008-03-26T03:33:42Z",..: 23111 24348 13701 27767 34585 17517 19613 22254 23043 18383 ...
$ followers : int 9 4 7 2 0 2 13 5 1 0 ...
$ following : int 16 38 14 89 0 0 21 126 48 1 ...
$ hireable : Factor w/ 2 levels "","True": 1 1 1 1 1 1 1 1 1 1 ...
$ location : chr "" "" "" "" ...
$ login : chr "moloach" "bhxch" "YueNing" "BigFaceCatMhc" ...
$ name : chr "" "Zhe Lee" "naodongbanana" "" ...
$ public_gists : int 0 0 0 0 0 0 1 24 0 0 ...
$ type : Factor w/ 1 level "User": 1 1 1 1 1 1 1 1 1 1 ...
$ closed_at : logi NA NA NA NA NA NA ...
$ updated_at.x : Factor w/ 38705 levels "2015-10-22T09:47:56Z",..: 14522 14681 25825 31448 31450 37456 23803 31101 4862 19705 ...
$ email : chr "" "mytempbh@outlook.com" "n1085633848@outlook.com" "" ...
$ organizations_url: chr "https://api.github.com/users/moloach/orgs" "https://api.github.com/users/bhxch/orgs" "https://api.github.com/users/YueNing/orgs" "https://api.github.com/users/BigFaceCatMhc/orgs" ...
$ public_repos : int 10 34 29 4 0 26 93 102 13 2 ...
$ created_at : POSIXct, format: "2016-07-12 05:17:50" "2016-08-27 14:04:23" "2015-06-19 13:57:11" "2017-01-14 03:30:31" ...
$ updated_at : POSIXct, format: "2019-03-09 00:15:05" "2019-03-09 09:30:45" "2019-03-25 10:50:50" "2019-03-28 00:02:38" ...
company_info <- dt_user_cld %>%
group_by(company) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
filter(company != "")
# Display the top companies.
company_info
# Display the company list.
print(company_info$company)
[1] "Tencent" "Baidu"
[3] "Alibaba" "Zhejiang University"
[5] "JD" "Tsinghua University"
[7] "baidu" "Shanghai Jiao Tong University"
[9] "UESTC" "Peking University"
[11] "Nanjing University" "None"
[13] "null" "SJTU"
[15] "none" "China"
[17] "Netease" "@Alibaba"
[19] "BUPT" "USTC"
[21] "Wuhan University" "ZJU"
[23] "alibaba" "NetEase"
[25] "ThoughtWorks" "@Tencent"
[27] "Fudan University" "Harbin Institute of Technology"
[29] "HUST" "meituan"
[31] "NULL" "Bytedance"
[33] "Xidian University" "百度"
[35] "@alibaba" "Alibaba Group"
[37] "Beijing Institute of Technology" "JD.COM"
[39] "Alibaba Inc." "Alipay"
[41] "baidu.com" "Beihang University"
[43] "Beijing University of Posts and Telecommunications" "Carnegie Mellon University"
[45] "JD.com" "NJU"
[47] "SYSU" "DiDi"
[49] "eleme" "free"
[51] "Freelancer" "Google"
[53] "Microsoft" "no"
[55] "South China University of Technology" "tencent"
[57] "@bytedance" "@Microsoft "
[59] "@tencent" "Baidu, Inc."
[61] "BIT" "BUAA"
[63] "china" "East China Normal University"
[65] "HIT" "Huawei"
[67] "Neusoft" "Southeast University"
[69] "Sun Yat-sen University" "Tongji University"
[71] "Xiamen University" "Xiaomi"
[73] "360" "ByteDance"
[75] "ECNU" "Home"
[77] "Huazhong University of Science and Technology" "ICT"
[79] "inspur" "Meitu"
[81] "Meituan" "Null"
[83] "Qihoo 360" "SCUT"
[85] "Sichuan University" "student"
[87] "Student" "WHU"
[89] "xiaomi" "ZTE"
[91] "滴滴出行" "@baidu "
[93] "@Meituan-Dianping" "@Microsoft"
[95] "@XiaoMi" "Alibaba Cloud"
[97] "alipay" "Ant Financial"
[99] "Beijing Jiaotong University" "bilibili"
[101] "bupt" "Cheetah Mobile"
[103] "CN" "Dalian University of Technology"
[105] "Didi" "IBM"
[107] "Intel" "jd"
[109] "jd.com" "LeetCode"
[111] "Lenovo" "Nankai University"
[113] "netease" "Oracle"
[115] "ShanghaiTech University" "SUSE"
[117] "SUSTech" "TAL"
[119] "THU" "Weibo"
[121] "小红书" "无"
[123] "美团点评" "@Alipay"
[125] "@baidu" "@eleme"
[127] "@meituan" "@meituan-dianping"
[129] "@Netease" "@ZangaiFamily "
[131] "2dfire" "asiainfo"
[133] "beijing" "Bilibili"
[135] "bytedance" "CASIA"
[137] "CETC" "China Telecom"
[139] "Chongqing University" "Columbia University"
[141] "CQUPT" "dianping.com"
[143] "Freelance" "GDUT"
[145] "Grab" "HKUST"
[147] "iflytek" "Iflytek"
[149] "Inspur" "iqiyi"
[151] "Jilin University" "Kingsoft"
[153] "Lianjia" "MIT"
[155] "MTDP" "Nanyang Technological University"
[157] "NCEPU" "NetEase Games"
[159] "no company" "nothing"
[161] "NWPU" "NYU"
[163] "personal" "Qiniu"
[165] "Qunar" "SHU"
[167] "Sina" "suning"
[169] "Tencent Inc." "Tianjin University"
[171] "Tsinghua" "UCAS"
[173] "UCloud" "University of Edinburgh"
[175] "University of Electronic Science and Technology of China" "University of Minnesota"
[177] "University of Southern California" "University of Washington"
[179] "unknown" "UW-Madison"
[181] "VIPKID" "Virginia Tech"
[183] "ximalaya" "XMU"
[185] "Youzan" "YY Inc."
[187] "zte" "ʕ•̫͡•ʔ-̫͡-ʕ•͡\u0353•ʔ-̫͡-ʔ"
[189] "京东" "今日头条"
[191] "大搜车" "好未来"
[193] "无业游民" "自由职业"
[195] "阿里巴巴" "?"
[197] "." "..."
[199] "@" "@alibaba "
[201] "@alipay" "@apache"
[203] "@b3log " "@Baidu"
[205] "@banggood" "@bearyinnovative "
[207] "@bilibili" "@Bilibili"
[209] "@BISTU" "@chaitin"
[211] "@ctripcorp" "@google"
[213] "@iqiyi" "@justice-code "
[215] "@Kyligence " "@Mobike"
[217] "@MobileNowGroup " "@Muxi-Studio"
[219] "@myteksi " "@netease"
[221] "@NetEase" "@pingcap "
[223] "@RedrockTeam " "@ruguoapp "
[225] "@Seniverse" "@TalkingData "
[227] "@thoughtworks" "@ThoughtWorks"
[229] "@Tradeshift " "@wacai"
[231] "@weibocom" "@xiachufang "
[233] "@xiaomi" "@youzan"
[235] "@youzan " "@zaihui"
[237] "@zhihu" "***"
[239] "~" "1"
[241] "360企业安全" "a"
[243] "Aalto University" "ABC"
[245] "Alauda" "alibaba-inc"
[247] "Alibaba.inc" "Amazon"
[249] "arxanfintech" "Asiainfo"
[251] "Atommatrix" "Beijing"
[253] "Beijing Forestry University" "Beijing Normal University"
[255] "Beijing,China" "BISTU"
[257] "BJTU" "bonc"
[259] "Booking.com" "Bytedance Inc."
[261] "Bytedance.Inc" "Camera360"
[263] "CC" "Central South University"
[265] "China Mobile Communications Corporation" "Chinese Academy of Sciences"
[267] "Cisco" "CMB"
[269] "cmcm" "CMU"
[271] "CQU" "creditease"
[273] "csust" "ctrip"
[275] "Ctrip" "Ctrip.com"
[277] "Dianping" "didi"
[279] "DIDI" "DLUT"
[281] "DotC United Group" "Douban Inc."
[283] "DZH" "eBay"
[285] "ele.me" "ELEME Inc."
[287] "elong" "FE"
[289] "Freedom" "freelance"
[291] "freelancer" "Georgetown University"
[293] "google" "gyyx"
[295] "GZHU" "Hand"
[297] "Hangzhou Dianzi University" "HDU"
[299] "HFUT" "Hohai University"
[301] "home" "Horizon Robotics"
[303] "HP" "https://github.com/eleme"
[305] "Huazhong University of Science & Technology" "hundsun"
[307] "hust" "HW"
[309] "ICT, CAS" "Indie Developer"
[311] "Infosys" "JNU"
[313] "jumei" "Kingdee"
[315] "Kingnet" "Kwai"
[317] "liulishuo" "LLS"
[319] "Magic" "Mars"
[321] "Megvii" "Meili-inc"
[323] "meituan.com" "meizu"
[325] "Microsoft Corporation" "mistong"
[327] "MOGU" "Momenta"
[329] "N/A" "NaN"
[331] "Nanchang University" "National University of Singapore"
[333] "ND" "NEUQ"
[335] "NIO" "No"
[337] "NO" "Northwestern Polytechnical University"
[339] "NUAA" "Open to Opportunities"
[341] "oraro" "OSU"
[343] "out of work" "pinduoduo"
[345] "pingan" "PingAn"
[347] "PKU" "PPTV"
[349] "PWRD" "Qihoo360"
[351] "Qudian" "Renmin University of China"
[353] "RingCentral" "Rutgers"
[355] "saic" "Samsung"
[357] "Sankuai" "school"
[359] "SCNU" "SCP Foundation"
[361] "sensetime" "SenseTime"
[363] "Shanbay" "Shandong University"
[365] "Shanghai" "Shanghai Jiao Tong Univ."
[367] "Shanghai Jiaotong University" "ShenZhen University"
[369] "SiChuan University" "sina"
[371] "SMZDM" "Soochow University"
[373] "souche.com" "South China Normal University"
[375] "Splunk" "Sun Yat-Sen University"
[377] "SUT" "SWJTU"
[379] "TalkingData" "TCL"
[381] "The Hong Kong Polytechnic University" "The NetCircle"
[383] "Tsinghua university" "TsingHua University"
[385] "ucas" "Uestc"
[387] "undefined" "University of Melbourne"
[389] "UNSW" "upyun"
[391] "USTB" "vip.com"
[393] "vipkid" "vipshop"
[395] "weixin" "www.iflytek.com"
[397] "wx" "xiaoi.com"
[399] "youzan" "YY.Inc"
[401] "Zhejiang University of Technology" "Zhihu"
[403] "Zhihu Inc." "zju"
[405] "上海灵娱网络科技" "中国"
[407] "二维火" "保密"
[409] "华为技术有限公司" "同花顺"
[411] "小米科技" "微店"
[413] "欢聚时代" "百姓网"
[415] "百度外卖" "重庆邮电大学"
[417] " @clustar.ai" " BaiFenDian Information Technology CO., LTD."
[419] " Harbin Institute of Technology, Zhejiang University" " NEC"
[421] " nugget" " TG"
[423] " ViceCity @ChillingEffect" " 上海奕明文化传媒有限公司"
[425] " 为中国孱弱的技术, 撑起一片自立自强的天空。" " 众安在线财产保险股份有限公司"
[427] " 无" "-"
[429] "--" "---"
[431] ": )" "...."
[433] ".NET" "(╯' - ')╯︵ ┻━┻ "
[435] "(주)에어텔닷컴" "(保密)"
[437] "(湖南)大农科技股份有限公司" "[Snriud] Co,ltd"
[439] "{{company}}" "『82.99 F.M』"
[441] "@ " "@ NUAA"
[443] "@01org" "@0x8023 "
[445] "@2048li" "@2345"
[447] "@24OI" "@24OI "
[449] "@258ch @ApacheCN " "@360"
[451] "@3rdStone @apachecn " "@4paradigm"
[453] "@51nb" "@55haitao"
[455] "@75team" "@7moor.com"
[457] "@811Noobs @wenon-dev " "@Accedo-Products "
[459] "@acgzone" "@ACLoong "
[461] "@AI" "@aimacity "
[463] "@airbnb " "@AirLoft "
[465] "@airteltour " "@alauda"
[467] "@alibaba @kubernetes" "@alibaba @ucweb"
[469] "@Alibaba International UED" "@alibabapictures"
[471] "@Alipay @macacajs" "@aliyun"
[473] "@Alkaids " "@alo7 "
[475] "@amazon.com" "@ample-cosplay "
[477] "@ampproject " "@android-plugin "
[479] "@AndroidConcentrationCamp" "@ant-design "
[481] "@Aobeef " "@apache "
[483] "@apachecn " "@apoiase"
[485] "@apollo-rescue" "@apple "
[487] "@AriadneThread " "@ARM"
[489] "@arqamfc" "@Asiainfo"
[491] "@Asuri-Team " "@athm-fe "
[493] "@australiaitgroup " "@AzukiCloud "
[495] "@b3log @FangStarNet " "@BaaSCMIoT"
[497] "@Babylonpartners " "@Baoban "
[499] "@BayatGames " "@bendcap"
[501] "@BiJie " "@bilibili "
[503] "@Bilibili bilibili" "@BIMK "
[505] "@bingblue " "@biztrology"
[507] "@blogs-dev " "@bmqb @itsCoder "
[509] "@bomquote " "@Botpy "
[511] "@breadtrip " "@breakdev "
[513] "@brlf-gz " "@browserstack"
[515] "@bupt" "@BUPT"
[517] "@Bytedance" "@ByteDance"
[519] "@ByteDance @Muxi-Studio" "@bytedance-hotsoon"
[521] "@bzy-ai " "@cachemoment "
[523] "@CactusBall " "@Caijijijijijiji"
[525] "@Canaan-Creative " "@cba "
[527] "@CDEFLS" "@chaitin "
[529] "@CHINA-JD @tiglabs" "@chinacourt"
[531] "@chinapexlabs " "@chinese-poetry "
[533] "@CIS2016 " "@CitoryTech "
[535] "@cjfed" "@cloud-ace"
[537] "@Cloud4est " "@cntehang"
[539] "@CoconutIslandStudio" "@codeparkhouston"
[541] "@coderemixer " "@Coding "
[543] "@computer-lab " "@coolspan"
[545] "@CovenantSQL " "@cqmbr @cqlinkoff @open-data-plan "
[547] "@CR" "@cryptomint"
[549] "@CSIRO-enviro-informatics " "@cumtflyingstudio "
[551] "@CVEO Wuhan University " "@Daimler RD/C TI"
[553] "@DanmakuPie " "@DaoCloud"
[555] "@DaoCloud " "@DaoCloud @Apache "
[557] "@DeepNorthAI " "@deepwn "
[559] "@defencedriver " "@demlution "
[561] "@DGeneAI " "@disject "
[563] "@Dispatchr @ServiceNow" "@dopobo"
[565] "@douban" "@Douban"
[567] "@DrPandaLtd " "@duocloud "
[569] "@DXY-F2E " "@easemob "
[571] "@eastern-all-stars" "@EasyHexo "
[573] "@ECNU @NVIDIA" "@eduvo"
[575] "@EigenLab" "@EigenLab "
[577] "@ejoy " "@ele.me"
[579] "@electron " "@eleme "
[581] "@emqx" "@Ericsson"
[583] "@Eros-Engine" "@exacloud"
[585] "@exacloud " "@ExpediaInc "
[587] "@F4ERP" "@Fenlly"
[589] "@flat-dev-ti " "@Forms"
[591] "@fox-one " "@frapsoft "
[593] "@Fusemachines" "@fusioncharts"
[595] "@gansutianqi " "@gaoding-inc"
[597] "@GeekPark " "@GitHubProjectsTosotada "
[599] "@gmfe" "@gmfe @To-Fun "
[601] "@GNYIO" "@godaddy"
[603] "@Golface " "@Google"
[605] "@google " "@goudai-projects "
[607] "@goworks " "@grab"
[609] "@Grab" "@Grab @myteksi "
[611] "@growingio" "@guanghetv "
[613] "@Guazi-inc" "@haici"
[615] "@haolianluo " "@happylifeplat"
[617] "@Hengbo" "@heremaps"
[619] "@HIT" "@HKUST-Aerial-Robotics "
[621] "@homecredit" "@honeybadger8 "
[623] "@huawei" "@Huawei"
[625] "@HumanBible" "@hunterplus.net"
[627] "@huntlabs " "@hupun"
[629] "@hyperledger " "@Hypers "
[631] "@iangeli " "@IBM Research"
[633] "@iboxpay " "@IceBear-666 "
[635] "@iflytek" "@IHSVInc "
[637] "@ikang" "@InBuff "
[639] "@InfinityStudio & @LWL-Networks " "@insightfinder"
[641] "@ioootech " "@iost-official "
[643] "@iQIYI" "@irdeto"
[645] "@itagnjs " "@jcgroup"
[647] "@JDFinance" "@jianshucom "
[649] "@jike-engineering" "@JNUGeek "
[651] "@JoyMoe " "@JumeiRdGroup"
[653] "@Jusot " "@jutasky "
[655] "@JXUT-BST @RGB-TEC" "@k2data"
[657] "@kayac " "@keruyun "
[659] "@Kesci " "@kfw001 "
[661] "@knownsec" "@Kucoin "
[663] "@LambdaInnovation" "@LaurelHome "
[665] "@LearnSolid " "@lianjia-tech"
[667] "@LinkedIn" "@listen-now @SXUOSA @Sele-frontend "
[669] "@LiveLucid " "@lomocoin"
[671] "@lotusflare " "@LVMM-H5 "
[673] "@lvxunDev " "@lyft "
[675] "@maizuo & @sodalife " "@maodouio "
[677] "@maptalks " "@MapTalks "
[679] "@marklogic " "@McGill"
[681] "@MediaKind" "@MeetYouDevs"
[683] "@Meituan" "@Meituan-dianping, Inc"
[685] "@meituan.com" "@Meituan美团"
[687] "@meizu" "@Meowv"
[689] "@mercari" "@metrodata"
[691] "@MicroMOOC " "@microstrategy"
[693] "@mime-mob " "@mindbridge-ai"
[695] "@Miovision " "@mixi-inc "
[697] "@MoerFinance " "@mogu @opensec-cn "
[699] "@Monkee-Boy " "@moqi-ai @ansrlab "
[701] "@moregold " "@MPICP "
[703] "@MSRA" "@MSTechAnLi "
[705] "@multisolution " "@Muxi-Studio "
[707] "@mvc9" "@mycodon"
[709] "@MyScript" "@nactro"
[711] "@Naver China" "@ncuhome"
[713] "@NeToucher" "@NeusoftSEMI "
[715] "@NiceLabs " "@NJUPT"
[717] "@nlitt " "@NO"
[719] "@nodejs 学徒" "@nokia"
[721] "@nomadeducation " "@northdark "
[723] "@northeastern" "@NUAA-Open-Source "
[725] "@nuofe " "@nuxt-community"
[727] "@NVIDIA" "@NVIDIA "
[729] "@nxintech" "@NXT-FE "
[731] "@nytm " "@ojlm "
[733] "@OmenSec @Aurora-SEC " "@OpenNetworking "
[735] "@oracle" "@orzbox "
[737] "@osu-translate-zh" "@Our404 "
[739] "@OurEDA_Lab" "@ours8 "
[741] "@p1cn" "@p1cn "
[743] "@Paypal" "@pcit-ce "
[745] "@PDFE @GoliGoliTV " "@petropub "
[747] "@pinduoduo" "@PingPlusPlus "
[749] "@plaid" "@platformsh "
[751] "@pogon " "@polar-bears"
[753] "@Politiwatch, @PAblueshells, @PAcompsci, & others." "@ponyai "
[755] "@Power2U " "@PPCredit"
[757] "@ppdai" "@Primlo"
[759] "@pyspring" "@qbox "
[761] "@qiakr" "@qibancom "
[763] "@qihoo360" "@Qihoo360"
[765] "@qiniu" "@qiniu @qbox"
[767] "@qk365.com" "@quixey "
[769] "@qunar.com" "@RainMC "
[771] "@rancher " "@rancherlabs "
[773] "@React-Native-Team" "@realidfarm "
[775] "@Redhotminute " "@REDMedis "
[777] "@ringcentral " "@rishiqing "
[779] "@riversearch " "@rngame"
[781] "@rongzhilian" "@RootCluster"
[783] "@runtimeverification " "@RyanxJS "
[785] "@SafetyCulture" "@santaio"
[787] "@saubyte " "@sec-bit"
[789] "@segmentio " "@seuxw "
[791] "@Shapetrace " "@shedaltd "
[793] "@sheenCity" "@ShengQianKuaiBao"
[795] "@ShimoFour " "@Shinetechchina"
[797] "@ShinyFrog" "@ShirasagiMoe "
[799] "@Shopify" "@shuopensourcecommunity "
[801] "@shuquyun" "@SicunStudio "
[803] "@sildevTeam " "@sillybobo "
[805] "@SJTU" "@SJTU-SE "
[807] "@Skylark-Studio " "@skypool-org"
[809] "@Sofihub " "@Soochow University;@IQIYI"
[811] "@sprigs" "@St15IOT "
[813] "@star-mine Inc." "@starrycloud"
[815] "@Strikingly" "@sugar-libraries "
[817] "@superatoms " "@superoneio"
[819] "@superoneio " "@susers "
[821] "@SUSTC" "@synyi "
[823] "@SYSU-MSC-Studio @wechat-miniprogram" "@TALFE "
[825] "@Talkpal" "@tarsocial "
[827] "@tboox & @xmake-io " "@Team-Explorer-Rescue-Robot"
[829] "@team-explorer-rescue-robot " "@teambition"
[831] "@teamhola @nodejs " "@TEamSwifter "
[833] "@Tencent " "@Tencent @QSCTech"
[835] "@tensorspace-team " "@thalmic "
[837] "@The-Orizon @telegram-zhCN " "@thenetcircle "
[839] "@ThinkSpatial " "@thoughtworks "
[841] "@ThoughtWorks Inc" "@Thoughtworks.com"
[843] "@ThoughtWorksInc" "@ti-net "
[845] "@tigerbrokers " "@TINNO-Sugar "
[847] "@Tinwork, @LuluDansMaRue " "@TK"
[849] "@Tomorning" "@Tomorning "
[851] "@TPDT @luainkernel " "@trackingio "
[853] "@transferwise " "@Tubitv "
[855] "@tuhu " "@TUMCREATE"
[857] "@TuSimple" "@TuyaInc"
[859] "@twitchalerts " "@twitter"
[861] "@ucloud" "@UCloud"
[863] "@ucress " "@udesk "
[865] "@UESTC" "@ulb"
[867] "@Uniquestudio" "@UniqueStudio"
[869] "@UniqueStudio " "@UniqueStudio @HUSTFE "
[871] "@Unisound" "@university-of-york"
[873] "@UnknownStudio " "@Uplusware"
[875] "@USTC-Courses " "@ustclug "
[877] "@V2Git " "@VeniiRobot"
[879] "@verydog @cmdboys @sunflower-ui " "@viabtc @coinexcom"
[881] "@vipshop" "@vuese"
[883] "@wanmoe.cn" "@wasmerio"
[885] "@webeyemob" "@weicheche"
[887] "@weidian" "@WhatToEatAtNoonToday "
[889] "@White-Album-Lab" "@whmall.com"
[891] "@Wi-Q " "@Wikimedia | @WoCUG "
[893] "@Wiredcraft " "@Wish @ContextLogic"
[895] "@WoSai " "@wxmagic "
[897] "@XiangWuShuo" "@Xiaomi"
[899] "@XiDeHao" "@XLCW"
[901] "@XMatrixStudio " "@xtTech "
[903] "@xueersi" "@yaochi "
[905] "@YHJ-WEB " "@Yigang-SH "
[907] "@YNUOSA " "@young-studio "
[909] "@yuni-tech " "@yurenio "
[911] "@zaihui @vuejs" "@ZangaiFamily"
[913] "@zendesk " "@ZenMX "
[915] "@Zetyun" "@zhengruioi"
[917] "@zhihu " "@zhiqicloud "
[919] "@Zilliqa" "@ZJU-CC98 "
[921] "@ZJUT" "@zoho"
[923] "@zoom" "@zstackio"
[925] "@ZTO-Express " "@ZZES-ZCDC "
[927] "@zzus " "@众人安"
[929] "@即刻 @itsCoder " "@客如云"
[931] "@小米" "@巧房 @ReactTemplate @qtonecn @ReactChina"
[933] "@极客学院" "@比特大陆 Bitmain"
[935] "@瓜子二手车" "@结婚类网站"
[937] "@铭师堂 @有赞(前)" "*** co.LTD"
[939] "*************" "**************"
[941] "**信息科技" "**科技有限公司"
[943] "/university" "` `` `"
[945] "<img src='' onerror='alert(1)' />" "☺"
[947] "0" "0.0"
[949] "100offer" "1024.engineer"
[951] "1050629507@qq.com" "111"
[953] "111工作室" "115"
[955] "123木头人" "127.0.0.1"
[957] "1476102147@qq.com" "163"
[959] "163.com" "1771882991@qq.com"
[961] "1990" "1KE.CO"
[963] "2015 Computer Science at HHU" "209"
[965] "213" "21cn"
[967] "222" "234234234"
[969] "2345" "2345.com"
[971] "244180439@qq.com" "24好玩"
[973] "263,Inc." "3’s Company"
[975] "3vjia" "3yisu.com"
[977] "404." "5151515151"
[979] "51CTO" "51IDC Inc."
[981] "51job" "51NB"
[983] "58" "58 Group HRG"
[985] "58ganji" "58同城"
[987] "5th" "61koudai.com"
[989] "65370392" "6人游"
[991] "7" "7moor"
[993] "9air" "9fen"
[995] "A great company, very very big . " "A jewelry company"
[997] "Abakus" "ABC360"
[999] "ABChina" "ABCI"
[ reached getOption("max.print") -- omitted 3137 entries ]
# Define the company aggregation function.
company_aggregation <- function(name) {
# Make case insensitive.
orig_name <- name
name <- toupper(name)
# Detect pattern and change the company name accordingly.
if (grepl("百度|BAIDU|AIDU", name)) {
target_name <- "Baidu"
} else if (grepl("ENCENT|腾讯|TENCENT", name)) {
target_name <- "Tencent"
} else if (grepl("LIBABA|淘宝|AOBAO|LIPAY|阿里巴巴|LIYUN|阿里云", name)) {
target_name <- "Alibaba"
} else if (grepl("JD|京东", name)) {
target_name <- "JD"
} else if (grepl("ETEASE|网易", name)) {
target_name <- "NetEase"
} else if (grepl("EITUAN|美团", name)) {
target_name <- "MeiTuan"
} else if (grepl("YTEDANCE|字节|头条", name)) {
target_name <- "ByteDance"
} else if (grepl("ELEME|饿了", name)) {
target_name <- "Eleme"
} else if (grepl("UAWEI|华为", name)) {
target_name <- "Huawei"
} else if (grepl("DIDI|滴滴|嘀嘀", name)) {
target_name <- "DiDi"
} else {
target_name <- orig_name
}
return (target_name)
}
# Define the education aggregation function.
education_aggregation <- function(name) {
# Make case insensitive
orig_name <- name
name <- toupper(name)
# Detect pattern and change the education accordingly.
if (grepl("HEJIANG|ZJU|浙江大学|浙大", name)) {
target_name <- "Zhejiang University"
} else if (grepl("SINGHUA|清华", name)) {
target_name <- "Tsinghua University"
} else if (grepl("SHANGHAI JIAO TONG|SJTU|上海交大|上海交通", name)) {
target_name <- "Shanghai Jiao Tong University"
} else if (grepl("UESTC|电子科大|电子科技", name)) {
target_name <- "University of Electronic Science and Technology of China"
} else if (grepl("USTC|中科大|中国科学技术", name)) {
target_name <- "University of Science and Technology of China"
} else if (grepl("FUDAN|复旦", name)) {
target_name <- "Fudan University"
} else if (grepl("ARBIN|哈", name)) {
target_name <- "Harbin Institute of Technology"
} else if (grepl("BUPT|北邮|北京邮电", name)) {
target_name <- "Beijing University of Post and Telecommunications"
} else {
target_name <- NA
}
return (target_name)
}
# Aggregating disparse companies.
agg_companies <- rep(NA, nrow(company_info))
agg_education <- rep(NA, nrow(company_info))
for (i in 1:nrow(company_info)) {
agg_companies[i] <- company_aggregation(company_info$company[i])
agg_education[i] <- education_aggregation(company_info$company[i])
}
company_info_agg <- cbind(company_info, agg_companies, agg_education)
# Show the top ten companies which have the most number of developer support 996.icu
company_info_agg %>% group_by(agg_companies) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
head(10)
# Show what universities are those developers from.
company_info_agg %>% group_by(agg_education) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
filter(!is.na(agg_education)) %>%
head(10)
Factor `agg_education` contains implicit NA, consider using `forcats::fct_explicit_na`
NA
#
# Define the function for aggregating the cities.
city_aggregation <- function(name) {
# Make case insensitive.
orig_name <- name
name <- toupper(name)
# Detect pattern and change the education accordingly.
if (grepl("EIJING|北京", name)) {
target_name <- "Beijing"
} else if (grepl("HANGHAI|上海", name)) {
target_name <- "Shanghai"
} else if (grepl("ANGZHOU|杭州", name)) {
target_name <- "Hangzhou"
} else if (grepl("UANGZHOU|广州", name)) {
target_name <- "Hangzhou"
} else if (grepl("HENGDU|成都", name)) {
target_name <- "Chengdu"
} else if (grepl("ANJING|南京", name)) {
target_name <- "Nanjing"
} else if (grepl("INGAPORE|新加坡", name)) {
target_name <- "Singapore"
} else if (grepl("HONG KONG|香港|HK", name)) {
target_name <- "Hong Kong"
} else if (grepl("UHAN|武汉", name)) {
target_name <- "Wuhan"
} else {
target_name <- orig_name
}
return (target_name)
}
city_info <- dt_user_cld %>%
group_by(location) %>%
summarise(count = n()) %>%
filter(location != "",
location != "China") %>%
arrange(desc(count))
agg_cities <- rep(NA, nrow(city_info))
for (i in 1:nrow(city_info)) {
agg_cities[i] <- city_aggregation(city_info$location[i])
}
city_info_agg <- cbind(city_info, agg_cities)
# Showing the top ten cities that have the most developer support 996.icu
city_info_agg %>% group_by(agg_cities) %>%
summarise(count = n()) %>%
filter(agg_cities != "",
agg_cities != "China") %>%
arrange(desc(count)) %>%
head(10)
# Distribution graph of supporter's followers under 50.
dist_ggplot <- dt_user_cld %>% filter(followers <= 50, following <= 50, public_repos <= 50) %>%
ggplot() +
geom_bar(aes(x = followers), col="black", fill="black", alpha=0.5) +
geom_bar(aes(x = following), col="black", fill="red", alpha=0.5) +
geom_bar(aes(x = public_repos), col="black", fill="blue", alpha=0.5)
dist_ggplot +
labs(x = "Followers (Black), Following (Red) and Public Repositories (Blue)",
y = "Count") +
ggtitle("Distribution Plot")